Prepare the data

# install.packages("psych", repos = "http://personality-project.org/r", type = "source")
# install.packages("psychTools", repos = "http://personality-project.org/r", type = "source")

# Load the relevant libraries --------------------------
library(psych)
library(psychTools)
library(tidyverse)
library(janitor)
library(readxl)
library(ggpubr)
library(kableExtra)

# Make sure you're running the most recent version of psych
# sessionInfo()
# Load the functions and data --------------------------
`%nin%` <- Negate(`%in%`)

## Load in the score data 
dryRun_ScoreExportNarrow <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_ScoreExportNarrow_20230323.csv") %>% 
  janitor::clean_names()

# dim(dryRun_ScoreExportNarrow)
# colnames(dryRun_ScoreExportNarrow)

## Load in the item data 
dryRun_ItemExportNarrow <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_ItemExportNarrow_20230323.csv") %>%
  janitor::clean_names()

# dim(dryRun_ItemExportNarrow)
# colnames(dryRun_ItemExportNarrow)

## Load in the age data 
dryRun_Registration_Age <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_Registration_Age_20230323.csv") %>% 
  janitor::clean_names()

# dim(dryRun_Registration_Age)
# colnames(dryRun_Registration_Age)

## Load in the DP4 data 
dryRun_dp4 <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_DP4_20230323.csv") %>% 
  janitor::clean_names()

# dim(dryRun_dp4) 
# colnames(dryRun_dp4)

# These are the IDs we care about for analysis
ids_for_analysis <- read_csv("data/2023-03-22T172307_shouldHavecorrected2.csv") %>% 
  rename(PIN = PINsago)

# dim(ids_for_analysis)

analysis_ids <- ids_for_analysis %>% 
  pull(PIN)

Transform the data so that we can use it

item_wide <- dryRun_ItemExportNarrow %>% 
  filter(key == "Score") %>% 
  pivot_wider(id_cols = c(pin, instrument_title),
              names_from = item_id,
              values_from = value) %>% 
  type_convert()
scores_long_df <- dryRun_ScoreExportNarrow %>% 
  filter(pin %in% analysis_ids) 

scores_long_age_df <- full_join(scores_long_df, dryRun_Registration_Age, 
                                by = c("pin", "pid", "registration_id", "assessment_name"),
                                multiple = "all")

Pull the information for doing the analysis

pull_instrument <- item_wide %>% 
  count(instrument_title) %>% 
  pull(instrument_title)

# Create a list that to save the variables into it
describe_df <- data.frame(instrument_title = NA,
                          item_id = NA,
                          n = NA,
                          mean = NA,
                          sd = NA,
                          min = NA,
                          max = NA)

# Determine if variables are poly or tetrachoric
for(i in 1:length(pull_instrument)) {
  df <- item_wide %>% 
    filter(instrument_title %in% paste(pull_instrument[i])) %>% 
    select(-c(pin))
  
  df <- df[,colSums(is.na(df))<nrow(df)]
  
  temp_describe <- describe(df, skew = FALSE) %>% 
    data.frame() %>% 
    select(-c(vars, range, se)) %>% 
    mutate(instrument_title = paste(pull_instrument[i]), 
           item_id = rownames(describe(df, skew = FALSE))) %>% 
    select(item_id, everything())
  
  row.names(temp_describe) <- NULL
  
  describe_df <- bind_rows(describe_df, temp_describe)         
  
}

describe_df <- describe_df %>% 
  filter(!is.na(instrument_title))


pull_poly <- describe_df %>% 
  filter(max > 1) %>% 
  count(instrument_title) %>% 
  # Verbal Counting only contains 1 scored item and therefore can't get a correlation
  filter(instrument_title != "Verbal Counting") %>% 
  pull(instrument_title)

pull_tetra <- describe_df %>% 
  filter(instrument_title %nin% pull_poly) %>% 
  filter(max == 1) %>% 
  filter(min == 0) %>% 
  count(instrument_title) %>% 
  pull(instrument_title)

Do the Analysis

Spearman

### Do all the polychoric measures ----

for(i in 1:length(pull_poly)) {
  df <- item_wide %>% 
    filter(instrument_title %in% paste(pull_poly[i])) %>% 
    select(-c(pin, instrument_title))
  
  df <- df[,colSums(is.na(df))<nrow(df)]
  
  cor_matrix <- df %>% 
    cor(., use = "pairwise", method = "spearman") %>% 
    round(., digits = 3)  
  
  
  cor_matrix %>% 
    cor.plot(., xlas = 3, main = paste(pull_poly[i]))
  
  #print(cor_matrix) # This needs to be prettier
}

Tetra

### Do the tetrachoric matrices ----

for(i in 1:length(pull_tetra)) {
  df <- item_wide %>% 
    filter(instrument_title %in% paste(pull_tetra[i])) %>% 
    select(-c(pin, instrument_title))
  
  df <- df[,colSums(is.na(df))<nrow(df)]
  
  cor_matrix_all <- df %>% 
    tetrachoric(., delete = FALSE, correct = FALSE) 
  
  cor_matrix <- round(cor_matrix_all$rho, digits = 3)
  
  cor_matrix %>% 
    cor.plot(., xlas = 3, main = paste(pull_tetra[i]))
  
  #print(cor_matrix) # This needs to be prettier
}

Battery Times

# Assessment Times ----
## Code what the assessment was
pull_parent_assessments <- scores_long_df %>% 
  select(test_name) %>% 
  filter(str_detect(test_name, "CBQ") |  
         str_detect(test_name, "CDI") |  
         str_detect(test_name, "Caregiver") |  
         str_detect(test_name, "IBQ") | 
           str_detect(test_name, "PROMIS")) %>% 
  unique() %>% 
  pull(test_name)

# Create a DF that will allow us to know the type of assessment
child_parent_assessment <- scores_long_df %>% 
  select(pin, registration_id, test_name, instrument_title) %>% 
  unique() %>% 
  mutate(type = ifelse(test_name %in% pull_parent_assessments, "Parent",
                       "Child")) %>% 
  select(pin, registration_id, type) %>% 
  unique()

# Figure out the 16-21 battery 
touch_gaze_df <- dryRun_ItemExportNarrow %>% 
  filter(pin %in% analysis_ids) %>% 
  filter(key == "Score") %>% 
  select(pin, registration_id, instrument_title) %>% 
  unique() %>% 
  full_join(., child_parent_assessment, by = c("pin", "registration_id")) %>%  
  filter(type == "Child") %>% 
  select(-c(type)) %>% 
  filter(instrument_title %in% c("Executive Function", "NBT Touch Screen Tutorial",
                                 "Memory Task Learning", "Memory Task Test")) %>% 
  filter(!is.na(instrument_title)) %>% 
  arrange(pin) %>% 
  mutate(battery = ifelse(instrument_title == "Executive Function", "Gaze",
                          ifelse(instrument_title == "NBT Touch Screen Tutorial", "Touch", 
                                 ifelse(instrument_title == "Memory Task Learning", "Touch", 
                                        ifelse(instrument_title == "Memory Task Test", "Touch", 
                                        NA))))) %>% 
  select(-c(instrument_title)) %>% 
  unique()

# Find the assessment times of each battery
all_battery_times <- dryRun_ItemExportNarrow %>% 
  filter(pin %in% analysis_ids) %>% 
  filter(key == "DateCreated") %>% 
  group_by(pin, registration_id) %>% 
  mutate(min_time = min(value), 
         max_time = max(value)) %>% 
  select(pin, registration_id, min_time, max_time) %>% 
  unique() %>% 
  mutate(min_time = lubridate::as_datetime(min_time),
         max_time = lubridate::as_datetime(max_time),
         diff = difftime(max_time, min_time, units = "mins")) %>% 
  ungroup() %>% 
  mutate(diff_min = str_remove(diff, " min"),
         diff_min = as.numeric(diff_min)) %>% 
  full_join(., dryRun_Registration_Age, 
          by = c("pin", "registration_id"),
          multiple = "all") %>% 
  select(pin, registration_id, total_age_in_months, min_time, max_time, diff_min) %>% 
  full_join(., child_parent_assessment, by = c("pin", "registration_id")) %>% 
  full_join(., touch_gaze_df, by = c("pin", "registration_id")) %>% 
  mutate(parent_battery = ifelse(between(total_age_in_months, 3,5), "3-5 Month", 
                                 ifelse(total_age_in_months == 6, "6 Month", 
                                        ifelse(between(total_age_in_months, 7,8), "7-8 Month", 
                                               ifelse(between(total_age_in_months, 9,12), "9-12 Month", 
                                                      ifelse(between(total_age_in_months, 13,18), "13-18 Month", 
                                                             ifelse(between(total_age_in_months, 19,30), "19-30 Month", 
                                                                    ifelse(between(total_age_in_months, 31,36), "31-36 Month", 
                                                                           ifelse(total_age_in_months >= 37, "37+ Month", NA)))))))),
         parent_battery = ifelse(type == "Parent", parent_battery, NA),
         parent_battery = as.factor(parent_battery),
         parent_battery = fct_relevel(parent_battery, "3-5 Month","6 Month", 
                                      "7-8 Month", "9-12 Month", "13-18 Month", 
                                      "19-30 Month", "31-36 Month","37+ Month"),
         child_battery = ifelse(between(total_age_in_months, 1,5), "1-5 Month",
                                ifelse(between(total_age_in_months, 6,8), "6-8 Month", 
                               ifelse(between(total_age_in_months, 9,15), "9-21 Month", 
                                ifelse(battery == "Gaze" & between(total_age_in_months, 16,21), "9-21 Month",
                                 ifelse(battery == "Touch" & between(total_age_in_months, 16,21), "22-24 Month",
                                 ifelse(between(total_age_in_months, 22,24), "22-24 Month", 
                                  ifelse(between(total_age_in_months, 25,36), "25-36 Month", 
                                   ifelse(total_age_in_months >= 37, "37+ Month", NA)))))))),
         child_battery = ifelse(type == "Child", child_battery, NA),
         child_battery = as.factor(child_battery),
         child_battery = fct_relevel(child_battery, "1-5 Month", "6-8 Month", "9-21 Month",  
                                     
                                     "22-24 Month",  "25-36 Month", "37+ Month")
         ) %>% 
  arrange(pin) %>% 
  select(-c(battery))

battery_times_child <- all_battery_times %>% 
  filter(type == "Child") %>% 
  select(-c(type)) 

battery_times_parent <- all_battery_times %>% 
  filter(type == "Parent") %>% 
  select(-c(type))

An overall graph of battery times

all_battery_times %>% 
  ggplot(aes(x = diff_min)) +
  geom_histogram() +
  facet_wrap(~type)

Child Measures: Assessment times by battery

## Child Battery -----
battery_times_child %>% 
  ggplot(aes(x = diff_min)) +
  geom_histogram() +
  facet_wrap(~child_battery) +
  labs(title = "Distribution of Timing on Child Batteries",
       x = "Time on Battery (min)",
       y = "Count")

child_battery_times <- describeBy(battery_times_child$diff_min, group = battery_times_child$child_battery,
           mat = TRUE, skew = FALSE) %>% 
  select(-c(item, vars))

rownames(child_battery_times) <- NULL

child_battery_times %>% 
  kbl(caption = "Descriptive Statistics of Child Batteries") %>% 
  kable_styling()
Descriptive Statistics of Child Batteries
group1 n mean sd min max range se
1-5 Month 6 27.81944 10.063368 14.96667 43.60000 28.63333 4.108353
6-8 Month 7 44.53333 4.414968 38.83333 49.80000 10.96667 1.668701
9-21 Month 28 66.00060 14.446816 45.38333 100.01667 54.63333 2.730192
22-24 Month 8 68.18125 14.093983 50.31667 91.23333 40.91667 4.982975
25-36 Month 20 59.02333 6.623665 47.68333 69.53333 21.85000 1.481097
37+ Month 14 72.24881 9.510726 58.81667 90.50000 31.68333 2.541848

Parent Measures: Assessment times by battery

## Parent Battery -----
battery_times_parent %>% 
  ggplot(aes(x = diff_min)) +
  geom_histogram() +
  facet_wrap(~parent_battery) +
  labs(title = "Distribution of Timing on Parent Batteries",
       x = "Time on Battery (min)",
       y = "Count")

parent_battery_times <- describeBy(battery_times_parent$diff_min, group = battery_times_parent$parent_battery,
           mat = TRUE, skew = FALSE) %>% 
  select(-c(item, vars))

rownames(parent_battery_times) <- NULL

parent_battery_times %>% 
  kbl(caption = "Descriptive Statistics of Parent Batteries") %>% 
  kable_styling()
Descriptive Statistics of Parent Batteries
group1 n mean sd min max range se
3-5 Month 5 7.68000 5.565362 4.116667 17.46667 13.35000 2.488905
6 Month 2 15.20000 9.805214 8.266667 22.13333 13.86667 6.933333
7-8 Month 5 14.31667 7.732651 7.050000 24.78333 17.73333 3.458147
9-12 Month 10 19.00333 12.423983 4.950000 40.10000 35.15000 3.928808
13-18 Month 11 27.54848 16.862303 8.233333 55.86667 47.63333 5.084175
19-30 Month 22 16.18182 9.255626 4.466667 47.15000 42.68333 1.973306
31-36 Month 10 15.31333 7.160436 4.733333 25.80000 21.06667 2.264329
37+ Month 14 14.05000 4.575067 9.016667 27.01667 18.00000 1.222738